data <- read.csv("/home/bambrozi/2_CORTISOL/Data/T4_Elora_Data_04_25_2024.csv")
# Replace "treated" with NA
data$T4Cortisol[data$T4Cortisol == "treated" | data$T4Cortisol == "Treated at T2" | data$T4Cortisol == "treated at T2"] <- NA
# Convert the column to numeric, coercing non-numeric values to NA
data$T4Cortisol <- as.numeric(as.character(data$T4Cortisol))
#Filtering only the lines with values
data <- data[!is.na(data$T4Cortisol),]
#creating new data file cleaned
write.csv(data, "/home/bambrozi/2_CORTISOL/Data/data_clean.csv", row.names = F)
print(data)
# Summary Statistics
summary(data$T4Cortisol)
# Histogram
hist(data$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")
# Boxplot
boxplot(data$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")
# Density Plot
plot(density(data$T4Cortisol), main = "Density Plot of T4 Cortisol", xlab = "T4 Cortisol", ylab = "Density")
# Calculate the theoretical quantiles
qqnorm(data$T4Cortisol, main = "QQ Plot of T4Cortisol", xlim = c(min(qqnorm(data$T4Cortisol)$x), max(qqnorm(data$T4Cortisol)$x)), ylim = c(min(qqnorm(data$T4Cortisol)$y), max(qqnorm(data$T4Cortisol)$y) + 2 * IQR(qqnorm(data$T4Cortisol)$y)))
# Add the QQ line
qqline(data$T4Cortisol, col = "red")
Summary statistics
Histogram
Density
Box_Plot
qq_Plot
Shapiro-Wilk normality test
I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.
data$Categorical <- ifelse(data$T4Cortisol >= 956, "H",
ifelse(data$T4Cortisol <= 190.8, "L", "M"))
table(data$Categorical)
library(ggplot2)
# Reorder the levels of the 'Categorical' column
data$Categorical <- factor(data$Categorical, levels = c("L", "M", "H"))
# Create the histogram with reordered categories
ggplot(data, aes(x = Categorical, fill = Categorical)) +
geom_bar() +
labs(title = "Histogram of T4Cortisol by Category",
x = "Category",
y = "Frequency") +
theme_minimal()
# Create the histogram
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
geom_histogram(binwidth = 50, color = "black", alpha = 0.7) + # Adjust binwidth as needed
labs(title = "Histogram of T4Cortisol with Color by Category",
x = "T4 Cortisol",
y = "Frequency",
fill = "Category") +
scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
theme_minimal()
# Create the density plot
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
geom_density(alpha = 0.3) +
labs(title = "Density Plot of T4Cortisol with Color by Category",
x = "T4Cortisol",
y = "Density",
fill = "Category") +
scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
theme_minimal()
# Create a density plot
ggplot(data, aes(x = T4Cortisol)) +
geom_density() +
geom_vline(xintercept = c(956, 190.8), linetype = "dashed", color = "red") +
labs(title = "Density Plot of T4Cortisol with Vertical Lines",
x = "T4Cortisol",
y = "Density") +
theme_minimal()
The animals were sorted in these three categories >H = Hight >M = Medium >L = Low
The individuals frequency distribution in theese categories are shown in the plots below
Observing the previous plots I tried to remove the “outliers” phenotypes above 1250, but the outcome from Shapiro test is still indicating no normality of the data.
library(tidyverse)
data_no_out <- filter(data, T4Cortisol < 1250)
# Create QQ plot
qqnorm(data_no_out$T4Cortisol, main = "QQ Plot of T4Cortisol", xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(data$T4Cortisol, col = "red")
boxplot(data_no_out$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")
hist(data_no_out$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")
shapiro.test(data$T4Cortisol)
Lucas Alcântara sent me the path to the genotype and pedigree files: /data/cgil/daiclu/6_Data/database/public_output/bruno
In this folder we found the following files:
I made a copy of this files in a folder called Raw_files:
/home/bambrozi/2_CORTISOL/RawFiles
This directory has two sub-directories:To perfome this I used two codes
sed -i '1i id Call' genotypes.txt
filename = 'bruno_gntps.txt'
outputFileOpen = open('genoplink.ped','w')
recode = {'0':['C','C'] , '1':['A','C'] , '2':['A','A'] , '5':['0','0'] }
for line in open(filename,'r'):
if 'Call' in line : continue
ids, call = line.strip().split()
genotypes = [ recode[geno012][0] +' '+ recode[geno012][1] for geno012 in call ]
outputFileOpen.write("%s %s %s %s %s %s %s\n" % ('HO', ids, '0','0','0','-9',' '.join(genotypes)) )
Now I have a folder called /home/bambrozi/2_CORTISOL/Geno_files with the file named genoplink.ped
library(data.table)
pheno <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")
ped <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")
geno <- fread("/home/bambrozi/2_CORTISOL/Geno_files/genoplink.ped")
geno <- geno[,c("V2")]
#Bringing cdn_id to my phenotype file
#Generate a index with the match
matching_indices <- match(pheno$ID, ped$elora_id)
# Then, assign 'cdn_id' from 'ped' to 'pheno' where there are matches
pheno$cdn_id <- ifelse(!is.na(matching_indices), ped$cdn_id[matching_indices], NA)
#Making a phenotype file only with genotyped animals
pheno_genotyped <- pheno[pheno$cdn_id %in% geno$V2,]
#check if all animals in this file are genotyped
checkk <- pheno_genotyped$cdn_id %in% geno$V2
sum(checkk)
The phenotype file should have three columns: FID, Animal_id, Phenotype
HO <- rep("HO", 252)
pheno_gwas <- as.data.frame(cbind(HO, pheno_genotyped$cdn_id, pheno_genotyped$T4Cortisol))
colnames(pheno_gwas) <- c("FID", "cdn_id", "cortisol")
pheno_gwas$cdn_id <- as.numeric(pheno_gwas$cdn_id)
pheno_gwas$cortisol <- round(as.numeric(pheno_gwas$cortisol),2)
write.table(pheno_gwas, "/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", quote = F, row.names = F, col.names = T)
map <- fread("/data/cgil/daiclu/6_Data/database/public_output/bruno/DGVsnpinfo.2404.ho")
morgan <- data.frame(X0 = rep(0, 45101))
mapa=as.data.frame(cbind(map$chr, map$snp_name, morgan$X0, map$location))
head(mapa)
write.table(x = mapa, file = "/home/bambrozi/2_CORTISOL/Geno_files/genoplink.map", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)
system("/home/local/bin/plink --cow --nonfounders --allow-no-sex --keep-allele-order --file /home/bambrozi/2_CORTISOL/Geno_files/genoplink --make-bed --out /home/bambrozi/2_CORTISOL/Geno_files/genoplink")
With the code above I generated the bfiles:
We ran the code below to perfom the QC
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/Geno_files/genoplink
RESULT=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
/home/local/bin/plink \
--bfile ${DATA} \
--cow \
--allow-no-sex \
--hwe 1e-5 \
--maf 0.01 \
--geno 0.1 \
--mind 0.1 \
--keep-allele-order \
--make-bed \
--out ${RESULT}
The server screen outcome is shown below.
After the Quality Control we ended up with
To check for duplicated individuals I performed the KINSHIP analysis using one script from Larissa Braga. Running the King Analysis on Plink.
#!/bin/bash
DATA=/home/bambrozi/Extrm_ARS1_GrassHill_1/GENOTYPES/ONLY_GRASSHILL_AND_PHENO_after_QC/only_grasshill_and_pheno_clean
RESULT=/home/bambrozi/Extrm_ARS1_GrassHill_1/GENOTYPES/KING/result_king
plink2 \
--bfile ${DATA} \
--chr-set 29 \
--make-king-table \
--out ${RESULT}
This is the output screen on terminal:
The table below is the output /home/bambrozi/2_CORTISOL/Geno_files_after_KING/result_king.kin0 and have pairwise comparisons between all individuals.
Now we should open in R and check for individuals with more than 0,354, to perform this we can use the code below, also provided by Larissa Braga:
setwd("/home/bambrozi/2_CORTISOL/Geno_files_after_KING")
relatedness="result_king.kin0" ## change accordingly!!
library(data.table)
print(relatedness)
rel=fread(relatedness, h = T)
head(rel)
print("Individuals with different identifications above the cut off of 0.354:")
dup=subset(rel, KINSHIP >= 0.354 & IID1!=IID2)
print(dup)
nrow(dup)
So the code above will provide this output if there is any duplicated individual.
We do not have any duplicated individual
So the file to be used are those in the directory /home/bambrozi/2_CORTISOL/Geno_files_after_QC
files:genoplink_clean
After Quality Control we didn’t lost any animal, so we don’t need to update our phenotype file
Now before performing the PCA analysis we need to change the FID for those individuals that has phenotype = 1 for Nadia.
#!/bin/bash
DATA=/home/bambrozi/Extrm_ARS1_GrassHill_1/PCA/imput_pca
RESULT=/home/bambrozi/Extrm_ARS1_GrassHill_1/PCA/pca_result
plink \
--bfile ${DATA} \
--keep-allele-order \
--chr-set 29 \
--pca \
--out ${RESULT}
The PCA output:
After generate the Eigenvalues and Eigenvectors I used the code below to generate the PCA Plot
setwd("/home/bambrozi/2_CORTISOL/PCA")
library(ggplot2)
library(tidyverse)
##
# Visualize PCA results
###
# read in result files
eigenValues <- read_delim("pca_result.eigenval", delim = " ", col_names = F)
eigenVectors <- read_delim("pca_result.eigenvec", delim = " ", col_names = F)
## Proportion of variation captured by each vector
eigen_percent <- round((eigenValues / (sum(eigenValues))*100), 2)
# PCA plot
png("pca-plink.eng.png", width=5, height=5, units="in", res=300)
ggplot(data = eigenVectors) +
geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 1, alpha = 1) +
geom_hline(yintercept = 0, linetype="dotted") +
geom_vline(xintercept = 0, linetype="dotted") +
labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
theme_minimal()
dev.off()
# PCA plot with animal ids
png("pca-plink.eng.animals_id.png", width=50, height=50, units="in", res=300)
ggplot(data = eigenVectors) +
geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 5, alpha = 1) +
geom_text(mapping = aes(x = X3, y = X4, label = X2), size = 2, hjust = 0, vjust = 0) + # Add labels for animal IDs
geom_hline(yintercept = 0, linetype="dotted") +
geom_vline(xintercept = 0, linetype="dotted") +
labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
theme_minimal()
dev.off()
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt
/home/local/bin/gcta \
--bfile ${DATA} \
--mlma-loco \
--pheno ${PHENO} \
--autosome-num 29 \
--autosome \
--out ${RESULT}
This is te output from the code above:
gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_result.loco.mlma",
head=T, stringsAsFactors = F)
gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}
bonf<- -log10(0.05/nrow(gwas))
library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20,
cex=1, lwd=1, ylab="", xlab="Chromossomes",
main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topleft", col="red2", lwd=2, c("Bonferroni correction"), bty="n")
The code above creates a Manhattam Plot, the correction for multiple test is the Bonferroni correction
The code below will save information of Significant SNP.
library(tidyverse)
SNP_sig_bonf <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
Below we can see the 1 significant SNP for Bonferroni correction.
gwas$bh <- p.adjust(gwas$p, method = "BH") #FDR
The code below will create a file with the significant snp for FDR-BH
SNP_sig_BH <- filter(gwas, bh < 0.05)
write.table(SNP_sig_BH, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_BH.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
As outcome we can see that the FDR-BH method didn’t increase the number of significant SNP.
Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.
Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")
library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
summarise(total_length = sum(`Seq length`)) %>%
pull(total_length)
# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8
# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)
NeL <- Ne*L_M
# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)
gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_result.loco.mlma",
head=T, stringsAsFactors = F)
gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}
bonf<- -log10(0.05/Me)
library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20,
cex=1, lwd=1, ylab="", xlab="Chromossomes",
main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topleft", col="red2", lwd=2, c("Bonferroni corr. for ind. segments"), bty="n")
library(tidyverse)
SNP_sig_bonf_ind_seg <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf_ind_seg, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
Below we can find the list of significant SNPs after the correction for Multiple Testing
I performed the qqPlot analysis using the code below:
gwas<- read.table("GWAS_result.loco.mlma", h=T)
ps<- gwas$p
inflation <- function(ps) {
chisq <- qchisq(1 - ps, 1)
lambda <- median(chisq) / qchisq(0.5, 1)
lambda
}
# Calculating the lambda - the lambda statistic should be close to 1 if
#the points fall within the expected range, or greater than one if
# the observed p-values are more significant than expected.
inflation(ps)
bonf<- -log10(0.05/nrow(gwas))
gwas$log<- -log10(gwas$p)
gg_qqplot <- function(ps, ci = 0.95) {
n <- length(ps)
df <- data.frame(
observed = -log10(sort(ps)),
expected = -log10(ppoints(n)),
clower = -log10(qbeta(p = (1 - ci) / 2, shape1 = 1:n, shape2 = n:1)),
cupper = -log10(qbeta(p = (1 + ci) / 2, shape1 = 1:n, shape2 = n:1))
)
log10Pe <- expression(paste("Expected -log"[10], plain(P)))
log10Po <- expression(paste("Observed -log"[10], plain(P)))
ggplot(df) +
geom_ribbon(
mapping = aes(x = expected, ymin = clower, ymax = cupper),
alpha = 0.1
) +
geom_point(aes(expected, observed), shape = 1, size = 3) +
geom_abline(intercept = 0, slope = 1, alpha = 0.5) +
# geom_line(aes(expected, cupper), linetype = 2, size = 0.5) +
# geom_line(aes(expected, clower), linetype = 2, size = 0.5) +
xlab(log10Pe) +
ylab(log10Po)
}
## plot -----
gg_qqplot(ps) +
theme_bw(base_size = 24) +
annotate(
geom = "text",
x = -Inf,
y = Inf,
hjust = -0.15,
vjust = 1 + 0.15 * 3,
label = sprintf("λ = %.2f", inflation(ps)),
size = 8
) +
theme(
axis.ticks = element_line(size = 0.5),
panel.grid = element_blank()
# panel.grid = element_line(size = 0.5, color = "grey80")
)
The outcome is the qqplot below, and the lambda value is \(\lambda\) = 1.03
After insert all rsID on VEP the summary is shown below:
Bellow I’m gonna show the “genome view” for each SNP (variant) recovered from VEP:
rs42217767 ps.
this variant wasn’t recovered automaticaly by VEP, so I typed the chr
and position on the search area of the “genome viewer”
rs110031217
rs110991998
rs42089058
rs41644634
rs41567074
ps. the unique reference genome available in the VEP is the ARS-UCD1.3, which is not a problem once it is working with the rsID, which for sure is our variant.
# GALLO
#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")
#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")
#import MARKER files = the GWAS output
gwas <- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg.txt",
head=T, stringsAsFactors = F)
# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))
colnames(gwas) <- c("CHR","SNP", "BP")
#FINDING GENES AND QTLs ARROUND THE MARKER
#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2,
marker_file= gwas,
method = "gene",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/out_genes_50k.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/out_genes_50k.csv")
#FINDING QTLs
out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2,
marker_file= gwas,
method = "qtl",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/out_qtl_50k.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/out_qtl_50k_clean.csv")
Dowloading the .gtf file from Ensembl https://useast.ensembl.org/info/data/ftp/index.html
Downloading the .gff file from AnimalQTLdb https://www.animalgenome.org/cgi-bin/QTLdb/index
The GALLO output are bellow:
For GENES
| X | CHR | SNP | BP | chr | start_pos | end_pos | width | strand | gene_id | gene_name | gene_biotype |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 11 | BTB-01060598 | 32301594 | 11 | 32278324 | 32766620 | 488297 | - | ENSBTAG00000046199 | NRXN1 | protein_coding |
| 2 | 11 | BTB-01060598 | 32301594 | 11 | 32280416 | 32280512 | 97 | - | ENSBTAG00000044573 | U6 | snRNA |
| 3 | 22 | ARS-BFGL-NGS-26419 | 46052082 | 22 | 45924535 | 46818511 | 893977 | - | ENSBTAG00000013117 | CACNA2D3 | protein_coding |
| 4 | 22 | ARS-BFGL-NGS-26419 | 46052082 | 22 | 46051998 | 46062657 | 10660 | + | ENSBTAG00000013124 | LRTM1 | protein_coding |
| 5 | 22 | ARS-BFGL-NGS-23411 | 5134078 | 22 | 5091037 | 5184321 | 93285 | + | ENSBTAG00000019832 | TGFBR2 | protein_coding |
| 6 | 26 | BTB-00926636 | 17857480 | 26 | 17703169 | 17846407 | 143239 | - | ENSBTAG00000011743 | TLL2 | protein_coding |
| 7 | 26 | BTB-00926636 | 17857480 | 26 | 17851539 | 17912665 | 61127 | - | ENSBTAG00000016298 | TM9SF3 | protein_coding |
| 8 | 26 | BTA-62125-no-rs | 17899619 | 26 | 17851539 | 17912665 | 61127 | - | ENSBTAG00000016298 | TM9SF3 | protein_coding |
| 9 | 26 | BTA-62125-no-rs | 17899619 | 26 | 17925643 | 18029878 | 104236 | - | ENSBTAG00000019872 | PIK3AP1 | protein_coding |
| 10 | 28 | BTA-99379-no-rs | 41666186 | 28 | 41609015 | 41649016 | 40002 | - | ENSBTAG00000007540 | GLUD1 | protein_coding |
| 11 | 28 | BTA-99379-no-rs | 41666186 | 28 | 41650333 | 41763371 | 113039 | + | ENSBTAG00000019194 | SHLD2 | protein_coding |
FOR QTLs
| X | CHR | SNP | BP | QTL_type | start_pos | end_pos | QTL_ID |
|---|---|---|---|---|---|---|---|
| 1 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5109498 | 5109502 | 151609 |
| 2 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5134076 | 5134080 | 151620 |
| 3 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5173197 | 5173201 | 151601 |
| 4 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51802 |
| 5 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51803 |
| 6 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51804 |
| 7 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51805 |
| 8 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Production | 46052080 | 46052084 | 51806 |
| 9 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Production | 46052080 | 46052084 | 51807 |
| 10 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51808 |
| 11 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51809 |
| 12 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Health | 46052080 | 46052084 | 51810 |
| 13 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51811 |
| 14 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51812 |
| 15 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51813 |
| 16 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Milk | 46073517 | 46073521 | 243365 |
| 17 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 195264 |
| 18 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 200269 |
| 19 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 200270 |
| 20 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 205295 |
| 21 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 198859 |
| 22 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 203868 |
| 23 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 203869 |
| 24 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 210297 |
| 25 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 198858 |
| 26 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 203866 |
| 27 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 203867 |
| 28 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 210296 |
| 29 | 26 | BTB-00926636 | 17857480 | Milk | 17811725 | 17811729 | 197493 |
| 30 | 26 | BTB-00926636 | 17857480 | Milk | 17811725 | 17811729 | 202704 |
| 31 | 26 | BTB-00926636 | 17857480 | Milk | 17812273 | 17812277 | 195685 |
| 32 | 26 | BTB-00926636 | 17857480 | Milk | 17812273 | 17812277 | 200785 |
| 33 | 26 | BTB-00926636 | 17857480 | Milk | 17813212 | 17813216 | 195285 |
| 34 | 26 | BTB-00926636 | 17857480 | Milk | 17813212 | 17813216 | 200301 |
| 35 | 26 | BTB-00926636 | 17857480 | Milk | 17813978 | 17813982 | 196236 |
| 36 | 26 | BTB-00926636 | 17857480 | Milk | 17813978 | 17813982 | 201468 |
| 37 | 26 | BTB-00926636 | 17857480 | Milk | 17815237 | 17815241 | 198002 |
| 38 | 26 | BTB-00926636 | 17857480 | Milk | 17815237 | 17815241 | 203190 |
| 39 | 26 | BTB-00926636 | 17857480 | Milk | 17817223 | 17817227 | 34750 |
| 40 | 26 | BTB-00926636 | 17857480 | Milk | 17817223 | 17817227 | 195586 |
| 41 | 26 | BTB-00926636 | 17857480 | Milk | 17821297 | 17821301 | 197235 |
| 42 | 26 | BTB-00926636 | 17857480 | Milk | 17821297 | 17821301 | 202455 |
| 43 | 26 | BTB-00926636 | 17857480 | Milk | 17822209 | 17822213 | 197783 |
| 44 | 26 | BTB-00926636 | 17857480 | Milk | 17822209 | 17822213 | 202983 |
| 45 | 26 | BTB-00926636 | 17857480 | Milk | 17823105 | 17823109 | 196204 |
| 46 | 26 | BTB-00926636 | 17857480 | Milk | 17823105 | 17823109 | 201419 |
| 47 | 26 | BTB-00926636 | 17857480 | Milk | 17824770 | 17824774 | 196867 |
| 48 | 26 | BTB-00926636 | 17857480 | Milk | 17824770 | 17824774 | 202101 |
| 49 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 195748 |
| 50 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 200855 |
| 51 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 206213 |
| 52 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 197144 |
| 53 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 197144 |
| 54 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 202367 |
| 55 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 202367 |
| 56 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 208485 |
| 57 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 208485 |
| 58 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 197611 |
| 59 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 197611 |
| 60 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 202816 |
| 61 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 202816 |
| 62 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 209076 |
| 63 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 209076 |
| 64 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 63656 |
| 65 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 63656 |
| 66 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 199068 |
| 67 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 199068 |
| 68 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 204113 |
| 69 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 204113 |
| 70 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 204114 |
| 71 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 204114 |
| 72 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 210399 |
| 73 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 210399 |
| 74 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 63657 |
| 75 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 63657 |
| 76 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 199069 |
| 77 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 199069 |
| 78 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 204115 |
| 79 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 204115 |
| 80 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 204116 |
| 81 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 204116 |
| 82 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 210400 |
| 83 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 210400 |
| 84 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 199070 |
| 85 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 199070 |
| 86 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 204117 |
| 87 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 204117 |
| 88 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 204118 |
| 89 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 204118 |
| 90 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 210401 |
| 91 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 210401 |
| 92 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 199071 |
| 93 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 199071 |
| 94 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 204119 |
| 95 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 204119 |
| 96 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 204120 |
| 97 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 204120 |
| 98 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 210402 |
| 99 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 210402 |
| 100 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 63658 |
| 101 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 63658 |
| 102 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 199072 |
| 103 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 199072 |
| 104 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 204121 |
| 105 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 204121 |
| 106 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 204122 |
| 107 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 204122 |
| 108 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 210403 |
| 109 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 210403 |
| 110 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 63660 |
| 111 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 63660 |
| 112 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 199074 |
| 113 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 199074 |
| 114 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 204125 |
| 115 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 204125 |
| 116 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 204126 |
| 117 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 204126 |
| 118 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 210405 |
| 119 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 210405 |
| 120 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 63654 |
| 121 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 199076 |
| 122 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 204129 |
| 123 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 204130 |
| 124 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 210407 |
| 125 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 63655 |
| 126 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 197826 |
| 127 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 203026 |
| 128 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 203027 |
| 129 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 209354 |
| 130 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 195860 |
| 131 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 201008 |
| 132 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 201009 |
| 133 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 206462 |
| 134 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 33984 |
| 135 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195668 |
| 136 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195669 |
| 137 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195670 |
| 138 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 200758 |
| 139 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17931209 | 17931213 | 199092 |
| 140 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17931209 | 17931213 | 204151 |
| 141 | 28 | BTA-99379-no-rs | 41666186 | Reproduction | 41646434 | 41646438 | 107048 |
| 142 | 28 | BTA-99379-no-rs | 41666186 | Reproduction | 41646434 | 41646438 | 107227 |
QTL type
#GALLO
par(mar=c(8,20,8,8))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1)
QTL type
#GALLO
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")
Reproduction
Health
#GALLO
#QTL enrichment analysis
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "Name",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]
write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/out_enrich_qtl_genome_name.csv")
out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "QTL_type",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/out_enrich_qtl_genome_type.csv")
#Plots
#Name
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")
#Type
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")
QTL Enrichment outcomes
Enrichment by name (enrichment analysis will be performed for each trait individually)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval | QTL_type |
|---|---|---|---|---|---|---|---|---|
| 5 | Milk C14 index | 35 | 4437 | 108 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 9 | Milk myristoleic acid content | 26 | 3047 | 108 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 10 | Milk palmitoleic acid content | 15 | 2422 | 108 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 6 | Milk C16 index | 12 | 2002 | 108 | 163224 | 0.0000000 | 0.0000001 | Milk |
| 12 | Muscle carnosine content | 3 | 67 | 108 | 163224 | 0.0000131 | 0.0000497 | Meat and Carcass |
| 14 | Pregnancy rate | 2 | 944 | 108 | 163224 | 0.1296627 | 0.4105985 | Reproduction |
| 18 | Teat length | 1 | 300 | 108 | 163224 | 0.1802436 | 0.4892327 | Exterior |
| 15 | Rear leg placement - side view | 1 | 430 | 108 | 163224 | 0.2479752 | 0.5235032 | Exterior |
| 17 | Stillbirth | 2 | 1363 | 108 | 163224 | 0.2280294 | 0.5235032 | Reproduction |
| 3 | Foot angle | 1 | 672 | 108 | 163224 | 0.3596272 | 0.6162877 | Exterior |
| 7 | Milk capric acid content | 1 | 912 | 108 | 163224 | 0.4541067 | 0.6162877 | Milk |
| 8 | Milk myristic acid content | 1 | 902 | 108 | 163224 | 0.4504612 | 0.6162877 | Milk |
| 13 | Net merit | 1 | 903 | 108 | 163224 | 0.4508269 | 0.6162877 | Production |
| 19 | Udder depth | 1 | 695 | 108 | 163224 | 0.3693423 | 0.6162877 | Exterior |
| 16 | Somatic cell score | 1 | 1122 | 108 | 163224 | 0.5253603 | 0.6654564 | Health |
| 2 | Conception rate | 1 | 1255 | 108 | 163224 | 0.5656375 | 0.6716945 | Reproduction |
| 1 | Calving ease | 2 | 3819 | 108 | 163224 | 0.7219243 | 0.7776744 | Reproduction |
| 4 | Length of productive life | 1 | 2004 | 108 | 163224 | 0.7367441 | 0.7776744 | Production |
| 11 | Milk yield | 1 | 6432 | 108 | 163224 | 0.9870080 | 0.9870080 | Milk |
Enrichment by QTL_type (enrichment processes performed for the QTL classes)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval |
|---|---|---|---|---|---|---|---|
| 1 | Exterior | 4 | 9077 | 108 | 163224 | 0.8569775 | 0.9999953 |
| 2 | Health | 1 | 5889 | 108 | 163224 | 0.9811250 | 0.9999953 |
| 3 | Meat and Carcass | 3 | 18258 | 108 | 163224 | 0.9997109 | 0.9999953 |
| 4 | Milk | 91 | 75352 | 108 | 163224 | 0.0000000 | 0.0000000 |
| 5 | Production | 2 | 19640 | 108 | 163224 | 0.9999848 | 0.9999953 |
| 6 | Reproduction | 7 | 35008 | 108 | 163224 | 0.9999953 | 0.9999953 |
online version: https://biit.cs.ut.ee/gprofiler/gost
I got a script from Julia Rodrigues about the R package GPROFILER to perform an enrichment of my Genes.
### enriquecimento genico
#install.packages("gprofiler2")
library(gprofiler2)
#Para conferir a lista de organism -> https://biit.cs.ut.ee/gprofiler/page/organism-list
#Obs: eu entro com os ids ENSOAR...
query <- read.table ("/home/bambrozi/2_CORTISOL/GALLO/out_genes_50k.txt", header = T)
query <- query[,c("gene_id")]
gene_enrich <- gost(
query,
organism = "btaurus",
ordered_query = FALSE,
multi_query = FALSE,
significant = TRUE,
exclude_iea = FALSE,
measure_underrepresentation = FALSE,
evcodes = TRUE,
user_threshold = 0.05,
correction_method = c("fdr"),
domain_scope = c("annotated"),
numeric_ns = "",
sources = NULL,
as_short_link = FALSE,
highlight = FALSE
)
#str(gene_enrich) # para ver o formato dos meus dados
#selecionando apenas as informações da lista que me interessam para fazer meu data.frame
result_enrich <- data.frame(gene_enrich$result)
result_enrich <- data.frame(Category = result_enrich$source,
ID = result_enrich$term_id,
Term = result_enrich$term_name,
adj_pvalue = result_enrich$p_value,
id_ensembl = result_enrich$intersection)
write.table(result_enrich,"/home/bambrozi/2_CORTISOL/GPROFILER/gene_enrich.txt", col.names=TRUE, row.names=FALSE, sep="\t", quote=F)
write.csv(result_enrich,"/home/bambrozi/2_CORTISOL/GPROFILER/gene_enrich.csv")
Below we can se the significant terms of this enrichment (output):
| X | Category | ID | Term | adj_pvalue | id_ensembl |
|---|---|---|---|---|---|
| 1 | GO:BP | GO:0006538 | glutamate catabolic process | 0.0383792 | ENSBTAG00000007540 |
| 2 | GO:BP | GO:0003274 | endocardial cushion fusion | 0.0383792 | ENSBTAG00000019832 |
| 3 | GO:BP | GO:1905005 | regulation of epithelial to mesenchymal transition involved in endocardial cushion formation | 0.0383792 | ENSBTAG00000019832 |
| 4 | GO:BP | GO:1905007 | positive regulation of epithelial to mesenchymal transition involved in endocardial cushion formation | 0.0383792 | ENSBTAG00000019832 |
| 5 | GO:BP | GO:0003430 | growth plate cartilage chondrocyte growth | 0.0383792 | ENSBTAG00000019832 |
| 6 | GO:BP | GO:0003431 | growth plate cartilage chondrocyte development | 0.0383792 | ENSBTAG00000019832 |
| 7 | GO:BP | GO:0003433 | chondrocyte development involved in endochondral bone morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 8 | GO:BP | GO:0003415 | chondrocyte hypertrophy | 0.0383792 | ENSBTAG00000019832 |
| 9 | GO:BP | GO:0048631 | regulation of skeletal muscle tissue growth | 0.0383792 | ENSBTAG00000011743 |
| 10 | GO:BP | GO:0051138 | positive regulation of NK T cell differentiation | 0.0383792 | ENSBTAG00000019832 |
| 11 | GO:BP | GO:0062043 | positive regulation of cardiac epithelial to mesenchymal transition | 0.0383792 | ENSBTAG00000019832 |
| 12 | GO:BP | GO:0051136 | regulation of NK T cell differentiation | 0.0383792 | ENSBTAG00000019832 |
| 13 | GO:BP | GO:0003186 | tricuspid valve morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 14 | GO:BP | GO:0062042 | regulation of cardiac epithelial to mesenchymal transition | 0.0383792 | ENSBTAG00000019832 |
| 15 | GO:BP | GO:0060434 | bronchus morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 16 | GO:BP | GO:0060440 | trachea formation | 0.0383792 | ENSBTAG00000019832 |
| 17 | GO:BP | GO:0060462 | lung lobe development | 0.0383792 | ENSBTAG00000019832 |
| 18 | GO:BP | GO:0060463 | lung lobe morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 19 | GO:BP | GO:0048632 | negative regulation of skeletal muscle tissue growth | 0.0383792 | ENSBTAG00000011743 |
| 20 | GO:BP | GO:0003175 | tricuspid valve development | 0.0383792 | ENSBTAG00000019832 |
| 21 | GO:BP | GO:0061520 | Langerhans cell differentiation | 0.0383792 | ENSBTAG00000019832 |
| 22 | GO:BP | GO:1905317 | inferior endocardial cushion morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 23 | GO:BP | GO:2000563 | positive regulation of CD4-positive, alpha-beta T cell proliferation | 0.0383792 | ENSBTAG00000019832 |
| 24 | GO:BP | GO:1990428 | miRNA transport | 0.0383792 | ENSBTAG00000019832 |
| 25 | GO:BP | GO:0002513 | tolerance induction to self antigen | 0.0383792 | ENSBTAG00000019832 |
| 26 | GO:BP | GO:0002514 | B cell tolerance induction | 0.0383792 | ENSBTAG00000019832 |
| 27 | GO:BP | GO:0003149 | membranous septum morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 28 | GO:BP | GO:1990086 | lens fiber cell apoptotic process | 0.0383792 | ENSBTAG00000019832 |
| 29 | GO:BP | GO:0002520 | immune system development | 0.0383792 | ENSBTAG00000019832,ENSBTAG00000019194 |
| 30 | GO:BP | GO:0002666 | positive regulation of T cell tolerance induction | 0.0383792 | ENSBTAG00000019832 |
| 31 | GO:BP | GO:0002651 | positive regulation of tolerance induction to self antigen | 0.0383792 | ENSBTAG00000019832 |
| 32 | GO:BP | GO:0061343 | cell adhesion involved in heart morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 33 | GO:BP | GO:0002649 | regulation of tolerance induction to self antigen | 0.0383792 | ENSBTAG00000019832 |
| 34 | GO:BP | GO:0002661 | regulation of B cell tolerance induction | 0.0383792 | ENSBTAG00000019832 |
| 35 | GO:BP | GO:0002664 | regulation of T cell tolerance induction | 0.0383792 | ENSBTAG00000019832 |
| 36 | GO:BP | GO:0002663 | positive regulation of B cell tolerance induction | 0.0383792 | ENSBTAG00000019832 |
| 37 | GO:BP | GO:0002684 | positive regulation of immune system process | 0.0428818 | ENSBTAG00000019832,ENSBTAG00000019872,ENSBTAG00000019194 |
| 38 | GO:BP | GO:0048630 | skeletal muscle tissue growth | 0.0431888 | ENSBTAG00000011743 |
| 39 | GO:BP | GO:0051251 | positive regulation of lymphocyte activation | 0.0431888 | ENSBTAG00000019832,ENSBTAG00000019194 |
| 40 | GO:BP | GO:0034154 | toll-like receptor 7 signaling pathway | 0.0431888 | ENSBTAG00000019872 |
| 41 | GO:BP | GO:0060439 | trachea morphogenesis | 0.0431888 | ENSBTAG00000019832 |
| 42 | GO:BP | GO:0002517 | T cell tolerance induction | 0.0431888 | ENSBTAG00000019832 |
| 43 | GO:BP | GO:0002645 | positive regulation of tolerance induction | 0.0431888 | ENSBTAG00000019832 |
| 44 | GO:BP | GO:0060433 | bronchus development | 0.0460331 | ENSBTAG00000019832 |
| 45 | GO:BP | GO:0003418 | growth plate cartilage chondrocyte differentiation | 0.0460331 | ENSBTAG00000019832 |
| 46 | GO:BP | GO:0002696 | positive regulation of leukocyte activation | 0.0482941 | ENSBTAG00000019832,ENSBTAG00000019194 |
| 47 | GO:BP | GO:0001865 | NK T cell differentiation | 0.0489635 | ENSBTAG00000019832 |
| 48 | GO:BP | GO:0050867 | positive regulation of cell activation | 0.0496277 | ENSBTAG00000019832,ENSBTAG00000019194 |
| 49 | GO:BP | GO:0003198 | epithelial to mesenchymal transition involved in endocardial cushion formation | 0.0496277 | ENSBTAG00000019832 |
| 50 | GO:BP | GO:0002643 | regulation of tolerance induction | 0.0496277 | ENSBTAG00000019832 |
| 51 | GO:BP | GO:2001034 | positive regulation of double-strand break repair via nonhomologous end joining | 0.0496277 | ENSBTAG00000019194 |
| 52 | GO:MF | GO:0004352 | glutamate dehydrogenase (NAD+) activity | 0.0098128 | ENSBTAG00000007540 |
| 53 | GO:MF | GO:0004353 | glutamate dehydrogenase [NAD(P)+] activity | 0.0098128 | ENSBTAG00000007540 |
| 54 | GO:MF | GO:0016639 | oxidoreductase activity, acting on the CH-NH2 group of donors, NAD or NADP as acceptor | 0.0098128 | ENSBTAG00000007540 |
| 55 | GO:MF | GO:0005026 | transforming growth factor beta receptor activity, type II | 0.0147171 | ENSBTAG00000019832 |
| 56 | GO:MF | GO:0048185 | activin binding | 0.0326677 | ENSBTAG00000019832 |
| 57 | GO:MF | GO:0036312 | phosphatidylinositol 3-kinase regulatory subunit binding | 0.0326677 | ENSBTAG00000019872 |
| 58 | GO:MF | GO:0034713 | type I transforming growth factor beta receptor binding | 0.0326677 | ENSBTAG00000019832 |
| 59 | GO:MF | GO:0017002 | activin receptor activity | 0.0326677 | ENSBTAG00000019832 |
| 60 | GO:MF | GO:0005024 | transforming growth factor beta receptor activity | 0.0326677 | ENSBTAG00000019832 |
| 61 | GO:MF | GO:0050431 | transforming growth factor beta binding | 0.0461234 | ENSBTAG00000019832 |
| 62 | GO:MF | GO:0043548 | phosphatidylinositol 3-kinase binding | 0.0461234 | ENSBTAG00000019872 |
| 63 | GO:MF | GO:0016638 | oxidoreductase activity, acting on the CH-NH2 group of donors | 0.0461234 | ENSBTAG00000007540 |
| 64 | GO:MF | GO:0005160 | transforming growth factor beta receptor binding | 0.0461234 | ENSBTAG00000019832 |
| 65 | GO:MF | GO:0004675 | transmembrane receptor protein serine/threonine kinase activity | 0.0461234 | ENSBTAG00000019832 |
| 66 | REAC | REAC:R-BTA-2243919 | Crosslinking of collagen fibrils | 0.0440084 | ENSBTAG00000011743 |
| 67 | REAC | REAC:R-BTA-2173788 | Downregulation of TGF-beta receptor signaling | 0.0440084 | ENSBTAG00000019832 |
| 68 | REAC | REAC:R-BTA-8964539 | Glutamate and glutamine metabolism | 0.0440084 | ENSBTAG00000007540 |
| 69 | REAC | REAC:R-BTA-112308 | Presynaptic depolarization and calcium channel opening | 0.0440084 | ENSBTAG00000013117 |
| 70 | REAC | REAC:R-BTA-2151201 | Transcriptional activation of mitochondrial biogenesis | 0.0440084 | ENSBTAG00000007540 |
| 71 | REAC | REAC:R-BTA-2173791 | TGF-beta receptor signaling in EMT (epithelial to mesenchymal transition) | 0.0440084 | ENSBTAG00000019832 |
From the online version of GPROFILER i got the following results.
Legend
Additionaly I perfomed a another version of this analysis but with no eletronic GO annotation
Now I’m gonna make a pie chart with the categories
library(ggplot2)
# Assuming sig_enrich$Category contains the categories
category_counts <- table(result_enrich$Category)
# Create a pie chart
pie_chart <- ggplot(data = NULL, aes(x = factor(1), fill = names(category_counts), y = as.numeric(category_counts))) +
geom_bar(stat = "identity", width = 1) +
coord_polar(theta = "y") +
labs(title = "Distribution among terms",
x = NULL,
y = NULL,
fill = "Category") +
theme_void() +
theme(legend.position = "right")
# Print the pie chart
print(pie_chart)
First I need to generate the .ped file
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file
plink \
--bfile ${DATA} \
--cow \
--recode \
--out ${RESULT}
Second, is necessary to convert the .ped file to Linkage format:
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/hapblock_in
plink \
--file ${DATA} \
--cow \
--recode HV \
--out ${RESULT}
The code above will generate a pair of files .ped and .info for each chromosome
I ran the haploview only for chromosome 11, 22, 26, 28 that are when the significant SNP are located.
As the LD Plot bring the SNP ordered, first I looked at /home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg.txt, to know the SNP position, and then compare with the Check Markers tab in the haploview SNP position x SNP# in plot
| Chr | SNP | bp | A1 | A2 | Freq | Haploview.order |
|---|---|---|---|---|---|---|
| 11 | BTB-01060598 | 32301594 | C | A | 0.4503970 | 680 |
| 22 | ARS-BFGL-NGS-23411 | 5134078 | A | C | 0.1865080 | 88 |
| 22 | ARS-BFGL-NGS-26419 | 46052082 | A | C | 0.0813492 | 762 |
| 26 | BTB-00926636 | 17857480 | C | A | 0.0257937 | 296 |
| 26 | BTA-62125-no-rs | 17899619 | C | A | 0.0257937 | 297 |
| 28 | BTA-99379-no-rs | 41666186 | A | C | 0.0496032 | 701 |
The two significant SNPs from chromosome 26 fell within a Haploblock.
Chr 11 (SNP 680)
Chr 22 (SNP 88)
Chr 22 (SNP 762)
Chr 26 (SNP 296 and 297)
Chr 28 (SNP 701)
As we found an haploblock on chromosome 26 I decided take a look inside this block. So, on Haploview I checked the name and position of the first SNP of this block (292) and the last (300).
| X. | Name | Position | ObsHET | PredHET | HWpval | X.Geno | FamTrio | MendErr | MAF | Alleles |
|---|---|---|---|---|---|---|---|---|---|---|
| 292 | ARS-BFGL-NGS-111577 | 17714604 | 0.401 | 0.448 | 0.1161 | 100 | 0 | 0 | 0.339 | A:C |
| 293 | ARS-BFGL-NGS-21408 | 17746468 | 0.369 | 0.403 | 0.2260 | 100 | 0 | 0 | 0.280 | C:A |
| 294 | ARS-BFGL-NGS-16328 | 17778908 | 0.056 | 0.054 | 1.0000 | 100 | 0 | 0 | 0.028 | A:C |
| 295 | ARS-BFGL-NGS-117063 | 17817225 | 0.413 | 0.452 | 0.2022 | 100 | 0 | 0 | 0.345 | C:A |
| 296 | BTB-00926636 | 17857480 | 0.052 | 0.050 | 1.0000 | 100 | 0 | 0 | 0.026 | A:C |
| 297 | BTA-62125-no-rs | 17899619 | 0.052 | 0.050 | 1.0000 | 100 | 0 | 0 | 0.026 | A:C |
| 298 | ARS-BFGL-NGS-110679 | 17953795 | 0.397 | 0.433 | 0.2211 | 100 | 0 | 0 | 0.317 | A:C |
| 299 | ARS-BFGL-NGS-72889 | 17989430 | 0.052 | 0.050 | 1.0000 | 100 | 0 | 0 | 0.026 | C:A |
| 300 | BTB-00926868 | 18040673 | 0.385 | 0.420 | 0.2319 | 100 | 0 | 0 | 0.300 | A:C |
Then I checked on NCBI’s Genome Data Viewer which genes are inside this block, adding 50k before the first SNP and 50K after the last SNP.
This interval has 426,069bp and has this genes inside:
As you can see in the image below:
The genes TLL2, TM9SF3 and PIK3AP1 already were in the gene list from GALLO, but the genes DNTT and OPALIN were new!
I performed a GPROFILER analysis only for these genes spotted in this Block 10.